16S-rDNA-V3-V4 repository: https://github.com/ycl6/16S-rDNA-V3-V4
phyloseq: GitHub, Documentation, Paper
GUniFrac: Paper
Demo Dataset: PRJEB27564 from Gut Microbiota in Parkinson’s Disease: Temporal Stability and Relations to Disease Progression. EBioMedicine. 2019;44:691-707
License: GPL-3.0
Rcd /ngs/16S-Demo/PRJEB27564
R
library("data.table")
library("phyloseq")
library("DESeq2")
library("ggplot2")
library("ggbeeswarm")
library("ggrepel")
library("vegan")
library("dplyr")Set the full-path to additional scripts
Check phyloseq object
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7712 taxa and 126 samples ]
## sample_data() Sample Data: [ 126 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 7712 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 7712 tips and 7710 internal nodes ]
ps using subset_taxaHere, we limit the taxa to those from Bacteria, Phylum and Class in not unassigned, and not belonging to the Mitochondria Family
## Kingdom Phylum Class Order Family Genus
## 1: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 2: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 3: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 4: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 5: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## ---
## 6536: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6537: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6538: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6539: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 6540: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## Species TotalCounts SV
## 1: <NA> 4311 SV0341
## 2: <NA> 274 SV1878
## 3: <NA> 36 SV4447
## 4: <NA> 176 SV2333
## 5: <NA> 18 SV5394
## ---
## 6536: <NA> 173 SV2351
## 6537: <NA> 18 SV5395
## 6538: <NA> 6 SV6574
## 6539: <NA> 15873 SV0105
## 6540: <NA> 456910 SV0001
ggplot(tdt, aes(TotalCounts)) + geom_histogram(bins = 50) + theme_bw() +
ggtitle("Histogram of Total Counts")## [1] 6
## [1] 228
# taxa cumulative sum
taxcumsum = tdt[, .N, by = TotalCounts]
setkey(taxcumsum, TotalCounts)
taxcumsum[, CumSum := cumsum(N)]
# Plot the cumulative sum of ASVs against the total counts
pCumSum = ggplot(taxcumsum, aes(TotalCounts, CumSum)) + geom_point() +
theme_bw() + xlab("Filtering Threshold") + ylab("ASV Filtered")
png("images/FilterTaxa-taxa-abundance.png", width = 8, height = 8, units = "in", res = 300)
gridExtra::grid.arrange(pCumSum, pCumSum + xlim(0, 500),
pCumSum + xlim(0, 100), pCumSum + xlim(0, 50), nrow = 2,
top = "ASVs that would be filtered vs. minimum taxa counts threshold")
invisible(dev.off())“images/FilterTaxa-taxa-abundance.png”
## Kingdom Phylum Class Order Family Genus
## 1: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 2: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 3: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 4: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 5: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## ---
## 824036: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 824037: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 824038: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 824039: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## 824040: Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae Subdoligranulum
## Species TaxaID SampleID count RelativeAbundance
## 1: <NA> SV0001 C0005B 4573 0.08878405
## 2: <NA> SV0001 C0009B 910 0.01732113
## 3: <NA> SV0001 C0019B 2059 0.02722465
## 4: <NA> SV0001 C0020B 1630 0.04075713
## 5: <NA> SV0001 C0024B 25665 0.25073271
## ---
## 824036: <NA> SV7712 P0120F 0 0.00000000
## 824037: <NA> SV7712 P0045F 0 0.00000000
## 824038: <NA> SV7712 P0045B 0 0.00000000
## 824039: <NA> SV7712 P0083F 0 0.00000000
## 824040: <NA> SV7712 P0083B 0 0.00000000
prevdt = mdt[, list(Prevalence = sum(count > 0),
TotalCounts = sum(count),
MaxCounts = max(count)), by = TaxaID]
prevdt## TaxaID Prevalence TotalCounts MaxCounts
## 1: SV0001 125 456910 25665
## 2: SV0002 95 325977 20183
## 3: SV0003 109 310061 18097
## 4: SV0004 117 295553 17406
## 5: SV0005 111 283113 19696
## ---
## 6536: SV7708 1 1 1
## 6537: SV7709 1 1 1
## 6538: SV7710 1 1 1
## 6539: SV7711 1 1 1
## 6540: SV7712 1 1 1
ggplot(prevdt, aes(Prevalence)) + geom_histogram(bins = 50) + theme_bw() +
ggtitle("Histogram of Taxa Prevalence")## [1] 2988
## [1] 1183
ggplot(prevdt, aes(MaxCounts)) + geom_histogram(bins = 100) + xlim(0, 500) +
theme_bw() + ggtitle("Histogram of Maximum TotalCounts")##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 6 240 230 196 181 165 172 125 135 114 107 105 96 85 85 88 73 84 58 58 74 64
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 48 48 42 56 47 57 48 41 49 45 51 38 46 41 43 38 40 42 29 33 36 35
## 45 46 47 48 49 50
## 27 39 29 34 29 29
## TaxaID Prevalence TotalCounts MaxCounts
## 1: SV7707 1 1 1
## 2: SV7708 1 1 1
## 3: SV7709 1 1 1
## 4: SV7710 1 1 1
## 5: SV7711 1 1 1
## 6: SV7712 1 1 1
# taxa cumulative sum
prevcumsum = prevdt[, .N, by = Prevalence]
setkey(prevcumsum, Prevalence)
prevcumsum[, CumSum := cumsum(N)]
# Plot the cumulative sum of ASVs against the prevalence
pPrevCumSum = ggplot(prevcumsum, aes(Prevalence, CumSum)) + geom_point(size = 2, alpha = 0.5) +
theme_bw() + xlab("Filtering Threshold") + ylab("ASVs Filtered") +
ggtitle("ASVs that would be filtered vs. minimum sample count threshold")
png("images/FilterTaxa-taxa-prevalence.png", width = 8, height = 8, units = "in", res = 300)
pPrevCumSum
invisible(dev.off())“images/FilterTaxa-taxa-prevalence.png”
Prevalence vs. Total Count Scatter plot
png("images/FilterTaxa-Prevalence-TotalCounts.png", width = 8, height = 8, units = "in", res = 300)
ggplot(prevdt, aes(Prevalence, TotalCounts)) + geom_point(size = 2, alpha = 0.5) +
scale_y_log10() + theme_bw() + xlab("Prevalence [No. Samples]") + ylab("TotalCounts [Taxa]")
invisible(dev.off())“images/FilterTaxa-Prevalence-TotalCounts.png”
Colored by phylum
addPhylum = unique(copy(mdt[, list(TaxaID, Phylum)]))
# Join by TaxaID
setkey(prevdt, TaxaID)
setkey(addPhylum, TaxaID)
prevdt = addPhylum[prevdt]
setkey(prevdt, Phylum)
png("images/FilterTaxa-Prevalence-TotalCounts-Phylum.png", width = 8, height = 8,
units = "in", res = 300)
ggplot(prevdt, aes(Prevalence, TotalCounts, color = Phylum)) +
geom_point(size = 1, alpha = 0.5) + scale_y_log10() + theme_bw() +
facet_wrap(~Phylum, nrow = 4) + theme(legend.position="none") +
xlab("Prevalence [No. Samples]") + ylab("Total Abundance")
invisible(dev.off())“images/FilterTaxa-Prevalence-TotalCounts-Phylum.png”
Remember to review the images/FilterTaxa-*.png plots to select the best paramters
keepTaxa = prevdt[(Prevalence > prevalenceThreshold &
TotalCounts > abundanceThreshold &
MaxCounts > maxThreshold), TaxaID]
ps1 = prune_taxa(keepTaxa, ps0)
phy_tree(ps1) = ape::root(phy_tree(ps1), sample(taxa_names(ps1), 1), resolve.root = TRUE)
ps1## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1425 taxa and 126 samples ]
## sample_data() Sample Data: [ 126 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 1425 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1425 tips and 1424 internal nodes ]
write.table(as.data.table(otu_table(ps1), keep.rownames = T), file = "outfiles/expr.otu_table.txt",
sep = "\t", quote = F, row.names = F, col.names = T)
write.table(as.data.table(tax_table(ps1), keep.rownames = T), file = "outfiles/expr.tax_table.txt",
sep = "\t", quote = F, row.names = F, col.names = T)write.table(reshape2::dcast(ps1 %>% psmelt() %>% arrange(OTU) %>% rename(ASV = OTU),
ASV+Kingdom+Phylum+Class+Order+Family+Genus+Species~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.abundance.all.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Phylum") %>% psmelt(),
Phylum~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.abundance.abphy.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Class") %>% psmelt(),
Class~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.abundance.abcls.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Family") %>% psmelt(),
Family~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.abundance.abfam.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Genus") %>% psmelt(),
Genus~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.abundance.abgen.txt", sep = "\t",
quote = F, row.names = F, col.names = T)Use the transform_sample_counts function to transforms the sample counts of a taxa abundance matrix according to the provided function, in the case the fractions of the whole sum
write.table(reshape2::dcast(ps1 %>% transform_sample_counts(function(x) {x/sum(x)} ) %>%
psmelt() %>% arrange(OTU) %>% rename(ASV = OTU),
ASV+Kingdom+Phylum+Class+Order+Family+Genus+Species~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.relative_abundance.all.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Phylum") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>%
psmelt(), Phylum~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.relative_abundance.abphy.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Class") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>%
psmelt(), Class~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.relative_abundance.abcls.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Family") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>%
psmelt(), Family~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.relative_abundance.abfam.txt", sep = "\t",
quote = F, row.names = F, col.names = T)
write.table(reshape2::dcast(ps1 %>% tax_glom(taxrank = "Genus") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>%
psmelt(), Genus~Sample_ID, value.var = "Abundance"),
file = "outfiles/expr.relative_abundance.abgen.txt", sep = "\t",
quote = F, row.names = F, col.names = T)# Transform to proportions (relative abundances)
ps1.rp = transform_sample_counts(ps1, function(OTU) OTU/sum(OTU))
# Top N taxa
N = 200
topN = names(sort(taxa_sums(ps1), decreasing=TRUE))[1:N]
ps1.topN = transform_sample_counts(ps1, function(OTU) OTU/sum(OTU))
ps1.topN = prune_taxa(topN, ps1.topN)
ps1.topN## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 200 taxa and 126 samples ]
## sample_data() Sample Data: [ 126 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 200 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 200 tips and 199 internal nodes ]
# All taxa
ptaxa1 = plot_bar(ps1.rp, x = "Sample_ID", fill = "Phylum",
title = paste(ntaxa(ps1.rp), "Taxa (Phylum)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative abundance")
# Top 200 taxa
ptaxa2 = plot_bar(ps1.topN, x = "Sample_ID", fill = "Phylum",
title = paste("Top",N, "Taxa (Phylum)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_phylum.png", width = 10, height = 8, units = "in", res = 300)
gridExtra::grid.arrange(ptaxa1, ptaxa2, nrow = 2)
invisible(dev.off())“images/plot_bar_phylum.png”
ptaxa3 = plot_bar(ps1.rp, x = "Sample_ID", fill = "Class",
title = paste(ntaxa(ps1.rp), "Taxa (Class)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative abundance")
ptaxa4 = plot_bar(ps1.topN, x = "Sample_ID", fill = "Class",
title = paste("Top",N, "Taxa (Class)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_class.png", width = 10, height = 8, units = "in", res = 300)
gridExtra::grid.arrange(ptaxa3, ptaxa4, nrow = 2)
invisible(dev.off())“images/plot_bar_class.png”
ptaxa5 = plot_bar(ps1.topN, x = "Sample_ID", fill = "Family",
title = paste("Top",N, "Taxa (Family)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 1)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_family.png", width = 10, height = 8, units = "in", res = 300)
ptaxa5
invisible(dev.off())“images/plot_bar_family.png”
ptaxa6 = plot_bar(ps1.topN, x = "Sample_ID", fill = "Genus",
title = paste("Top",N, "Taxa (Genus)")) +
geom_bar(stat = "identity", size = 0.1, color = "black") +
facet_wrap(~Group2, scales = "free_x", nrow = 1) + guides(fill = guide_legend(ncol = 2)) +
scale_y_continuous(breaks = seq(0, 1, 0.1)) +
theme(legend.title = element_text(size = 8), legend.text = element_text(size = 7),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 12)) + xlab("Sample") + ylab("Relative Abundance")
png("images/plot_bar_genus.png", width = 10, height = 8, units = "in", res = 300)
ptaxa6
invisible(dev.off())“images/plot_bar_genus.png”
Explore alpha diversity using the plot_richness function
# Select alpha-diversity measures
betaM = c("Observed", "Chao1", "Shannon", "Simpson")
png("images/plot_richness.png", width = 6, height = 5, units = "in", res = 300)
plot_richness(ps1, x = "Group2", measures = betaM, color = "Group2", nrow = 1) +
geom_point(size = 0.8) + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(size = 10, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 10)) +
labs(x = "Group", y = "Alpha Diversity Measure")
invisible(dev.off())“images/plot_richness.png”
Create a long-format data.frame and plot with ggplot function
ad = estimate_richness(ps1, measures = betaM)
ad = merge(data.frame(sample_data(ps1)), ad, by = "row.names")
ad = reshape2::melt(ad[,c("Sample_ID", "Group2", betaM)], id = c("Sample_ID", "Group2"))
png("images/plot_richness_boxplot.png", width = 6, height = 5, units = "in", res = 300)
ggplot(ad, aes(Group2, value, color = Group2)) +
geom_boxplot(outlier.shape = NA, size = 0.8, width = 0.8) +
geom_quasirandom(size = 0.8, color = "black") + theme_bw() +
facet_wrap(~ variable, scales = "free_y", nrow = 1) +
theme(legend.position = "none",
axis.text.x = element_text(size = 10, angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 10)) +
labs(x = "Group", y = "Alpha Diversity Measure")
invisible(dev.off())“images/plot_richness_boxplot.png”
Perform various flavors of unifrac measurements using the GUniFrac package
Perform ordination
# Calculate geometric means prior to estimate size factors
gm_mean = function(x, na.rm = TRUE){
exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}
# Perform variance stabilizing transformation
ps1.vsd = ps1
dds = phyloseq_to_deseq2(ps1, ~ Group2)## converting counts to integer mode
geoMeans = apply(counts(dds), 1, gm_mean)
dds = estimateSizeFactors(dds, geoMeans = geoMeans)
dds = estimateDispersions(dds)## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
abund.vsd = getVarianceStabilizedData(dds)
abund.vsd[abund.vsd < 0] = 0 # set negative values to 0
otu_table(ps1.vsd) = otu_table(abund.vsd, taxa_are_rows = TRUE)
# Create distance objects
dist_un = as.dist(unifracs[, , "d_UW"]) # Unweighted UniFrac
attr(dist_un, "method") = "Unweighted UniFrac"
dist_wu = as.dist(unifracs[, , "d_1"]) # Weighted UniFrac
attr(dist_wu, "method") = "Weighted UniFrac"
dist_vu = as.dist(unifracs[, , "d_VAW"]) # Variance-adjusted-weighted UniFrac
attr(dist_vu, "method") = "Variance-adjusted-weighted UniFrac"
dist_gu = as.dist(unifracs[, , "d_0.5"]) # GUniFrac with alpha 0.5
attr(dist_gu, "method") = "GUniFrac with alpha 0.5"
dist_bc = phyloseq::distance(ps1.vsd, "bray") # Bray-Curtis
# Perform ordination
ord_un = ordinate(ps1, method = "PCoA", distance = dist_un)
ord_wu = ordinate(ps1, method = "PCoA", distance = dist_wu)
ord_vu = ordinate(ps1, method = "PCoA", distance = dist_vu)
ord_gu = ordinate(ps1, method = "PCoA", distance = dist_gu)
set.seed(12345)
ord_bc = ordinate(ps1, method = "NMDS", distance = dist_bc)## Run 0 stress 0.1643532
## Run 1 stress 0.1643515
## ... New best solution
## ... Procrustes: rmse 0.0004348073 max resid 0.003627076
## ... Similar to previous best
## Run 2 stress 0.4123846
## Run 3 stress 0.1647262
## ... Procrustes: rmse 0.007101905 max resid 0.07513243
## Run 4 stress 0.1647288
## ... Procrustes: rmse 0.006996405 max resid 0.0753685
## Run 5 stress 0.1660952
## Run 6 stress 0.1657001
## Run 7 stress 0.1643496
## ... New best solution
## ... Procrustes: rmse 0.0007611539 max resid 0.007539293
## ... Similar to previous best
## Run 8 stress 0.1793736
## Run 9 stress 0.1656961
## Run 10 stress 0.1643504
## ... Procrustes: rmse 0.0008262269 max resid 0.006127567
## ... Similar to previous best
## Run 11 stress 0.1657059
## Run 12 stress 0.1647458
## ... Procrustes: rmse 0.005757801 max resid 0.06097133
## Run 13 stress 0.1647282
## ... Procrustes: rmse 0.007056738 max resid 0.07573969
## Run 14 stress 0.1647293
## ... Procrustes: rmse 0.007042765 max resid 0.07538188
## Run 15 stress 0.189651
## Run 16 stress 0.1657084
## Run 17 stress 0.1661109
## Run 18 stress 0.1788554
## Run 19 stress 0.1647292
## ... Procrustes: rmse 0.006985754 max resid 0.07564235
## Run 20 stress 0.1657013
## *** Solution reached
Output to PDF
pdf("images/plot_ordination.pdf", width = 6, height = 5, pointsize = 12)
plot_ordination(ps1, ord_un, type = "samples", color = "Group2",
title = "PCoA on unweighted UniFrac") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_wu, type = "samples", color = "Group2",
title = "PCoA on weighted UniFrac") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_vu, type = "samples", color = "Group2",
title = "PCoA on Variance adjusted weighted UniFrac") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_gu, type = "samples", color = "Group2",
title = "PCoA on GUniFrac with alpha 0.5") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
plot_ordination(ps1, ord_bc, type = "samples", color = "Group2",
title = "NMDS on Bray-Curtis distance (VST)") +
theme_bw() + coord_fixed(ratio = 1) +
stat_ellipse(aes(group = Group2), type = "t", size = 0.5, linetype = "dashed")
invisible(dev.off())Example: PCoA on weighted UniFrac
Permutational multivariate analysis of variance (PERMANOVA) test for differences between independent groups using the adonis function
##
## Call:
## adonis(formula = dist_un ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.3542 0.11805 1.163 0.0278 0.1339
## Residuals 122 12.3842 0.10151 0.9722
## Total 125 12.7383 1.0000
##
## Call:
## adonis(formula = dist_wu ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.1154 0.038455 1.2639 0.03014 0.1808
## Residuals 122 3.7118 0.030425 0.96986
## Total 125 3.8272 1.00000
##
## Call:
## adonis(formula = dist_vu ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.08185 0.027284 1.1939 0.02852 0.2468
## Residuals 122 2.78803 0.022853 0.97148
## Total 125 2.86989 1.00000
##
## Call:
## adonis(formula = dist_gu ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.3387 0.11291 1.1061 0.02648 0.2228
## Residuals 122 12.4546 0.10209 0.97352
## Total 125 12.7933 1.00000
##
## Call:
## adonis(formula = dist_bc ~ Group2, data = data.frame(sample_data(ps1)), permutations = 1000)
##
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group2 3 0.6685 0.22283 1.1591 0.02771 0.1179
## Residuals 122 23.4537 0.19224 0.97229
## Total 125 24.1222 1.00000
Test for homogeneity condition among groups using the betadisper function
disp1 = betadisper(dist_un, group = data.frame(sample_data(ps1))$Group2)
permutest(disp1, permutations = 1000)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.002917 0.00097247 0.5971 1000 0.6284
## Residuals 122 0.198686 0.00162857
disp2 = betadisper(dist_wu, group = data.frame(sample_data(ps1))$Group2)
permutest(disp2, permutations = 1000)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.006617 0.0022055 1.0552 1000 0.3536
## Residuals 122 0.254995 0.0020901
disp3 = betadisper(dist_vu, group = data.frame(sample_data(ps1))$Group2)
permutest(disp3, permutations = 1000)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.00155 0.00051714 0.1818 1000 0.9151
## Residuals 122 0.34708 0.00284493
disp4 = betadisper(dist_gu, group = data.frame(sample_data(ps1))$Group2)
permutest(disp4, permutations = 1000)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.004338 0.0014459 0.5911 1000 0.6014
## Residuals 122 0.298427 0.0024461
disp5 = betadisper(dist_bc, group = data.frame(sample_data(ps1))$Group2)
permutest(disp5, permutations = 1000)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.01867 0.006223 1.7644 1000 0.1638
## Residuals 122 0.43029 0.003527
Example: Dispersion of unweighted UniFrac
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS/LAPACK: /home/ihsuan/miniconda3/envs/p27/lib/libopenblasp-r0.3.7.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C LC_TIME=en_GB.UTF-8
## [4] LC_COLLATE=en_GB.UTF-8 LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods
## [9] base
##
## other attached packages:
## [1] dplyr_1.0.0 vegan_2.5-6 lattice_0.20-41
## [4] permute_0.9-5 ggrepel_0.8.2 ggbeeswarm_0.6.0
## [7] ggplot2_3.3.2 DESeq2_1.26.0 SummarizedExperiment_1.16.1
## [10] DelayedArray_0.12.3 BiocParallel_1.20.1 matrixStats_0.56.0
## [13] Biobase_2.46.0 GenomicRanges_1.38.0 GenomeInfoDb_1.22.1
## [16] IRanges_2.20.2 S4Vectors_0.24.4 BiocGenerics_0.32.0
## [19] phyloseq_1.30.0 data.table_1.12.8 knitr_1.29
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 hwriter_1.3.2 ellipsis_0.3.1
## [4] htmlTable_2.0.1 XVector_0.26.0 base64enc_0.1-3
## [7] rstudioapi_0.11 farver_2.0.3 bit64_0.9-7
## [10] AnnotationDbi_1.48.0 codetools_0.2-16 splines_3.6.2
## [13] geneplotter_1.64.0 dada2_1.14.1 ade4_1.7-15
## [16] Formula_1.2-3 jsonlite_1.7.0 Rsamtools_2.2.3
## [19] annotate_1.64.0 cluster_2.1.0 png_0.1-7
## [22] compiler_3.6.2 backports_1.1.8 Matrix_1.2-18
## [25] acepack_1.4.1 htmltools_0.5.0 tools_3.6.2
## [28] igraph_1.2.5 gtable_0.3.0 glue_1.4.1
## [31] GenomeInfoDbData_1.2.2 reshape2_1.4.4 ShortRead_1.44.3
## [34] Rcpp_1.0.5 vctrs_0.3.1 Biostrings_2.54.0
## [37] multtest_2.42.0 ape_5.4 nlme_3.1-148
## [40] iterators_1.0.12 xfun_0.15 stringr_1.4.0
## [43] lifecycle_0.2.0 XML_3.98-1.20 zlibbioc_1.32.0
## [46] MASS_7.3-51.6 scales_1.1.1 biomformat_1.14.0
## [49] rhdf5_2.30.1 RColorBrewer_1.1-2 yaml_2.2.1
## [52] memoise_1.1.0 gridExtra_2.3 rpart_4.1-15
## [55] latticeExtra_0.6-29 stringi_1.4.6 RSQLite_2.2.0
## [58] highr_0.8 genefilter_1.68.0 foreach_1.5.0
## [61] checkmate_2.0.0 rlang_0.4.6 pkgconfig_2.0.3
## [64] bitops_1.0-6 evaluate_0.14 purrr_0.3.4
## [67] Rhdf5lib_1.8.0 GenomicAlignments_1.22.1 htmlwidgets_1.5.1
## [70] labeling_0.3 bit_1.1-15.2 tidyselect_1.1.0
## [73] plyr_1.8.6 magrittr_1.5 R6_2.4.1
## [76] generics_0.0.2 Hmisc_4.4-0 DBI_1.1.0
## [79] pillar_1.4.5 foreign_0.8-73 withr_2.2.0
## [82] mgcv_1.8-31 survival_3.2-3 RCurl_1.98-1.2
## [85] nnet_7.3-14 tibble_3.0.2 crayon_1.3.4
## [88] rmarkdown_2.3 jpeg_0.1-8.1 locfit_1.5-9.4
## [91] grid_3.6.2 blob_1.2.1 digest_0.6.25
## [94] xtable_1.8-4 RcppParallel_5.0.2 munsell_0.5.0
## [97] beeswarm_0.2.3 vipor_0.4.5